setwd('')

library(dplyr)
library(haven)
library(MatchIt)

# Replication files: Door-to-door campaigns in an electoral autocracy: Evidence from Hungary

## VARIABLE TRANSFORMATION

d2d <- read.csv("d2d.csv")

# 1. Explanatory variables

# Treatment: binary of visited/not visited precinct
d2d <- d2d %>% mutate(visited = ifelse(!is.na(workday), 1, 0))

# Observable 1: population density in precinct (log of voter per building)
d2d <- d2d %>% mutate(vpb = 1 / hazszamok_szama_per_valasztok_sz, 
                      log_vpb = log(vpb))

# Observable 2: 2018 general election results for parties in precinct
d2d <- d2d %>% mutate(
  fidesz_share_18 = fidesz_share_18 * 100,
  momentum_share_18 = momentum_share_18 * 100,
  other_share_18 = (jobbik_share_18 + dk_share_18 + lmp_share_18 + mszp_share_18) * 100,
  left_share_18 = (dk_share_18 + mszp_share_18 + lmp_share_18) * 100,
  jobbik_share_18 = jobbik_share_18 * 100,
  turnout_18 = turnout_18 * 100
)

# Average penetration (share of precincts visited within town)
d2d <- d2d %>% group_by(city) %>% mutate(city_canv = mean(visited)) %>% ungroup()

# Local election district (alternative fixed effect to city)
d2d <- d2d %>% mutate(city_tevk = paste(city, tevk_19, sep="_"))

## 2. Dependent variable

## Precinct vote share in 2019: based on valid (cast) votes
d2d$momentum_share_19 <- d2d$momentum_share_19*100
d2d$fidesz_share_19 <- (d2d$fidesz_19/d2d$valid_19)*100
d2d$jobbik_share_19 <- (d2d$jobbik_19/d2d$valid_19)*100
d2d$dk_share_19 <- (d2d$dk_19/d2d$valid_19)*100
d2d$mszp_share_19 <- (d2d$mszp_19/d2d$valid_19)*100
d2d$lmp_share_19 <- (d2d$lmp_19/d2d$valid_19)*100
d2d$mkkp_share_19 <- (d2d$mkkp_19/d2d$valid_19)*100
d2d$other_share_19 <- d2d$dk_share_19+d2d$jobbik_share_19+d2d$mszp_share_19+
  d2d$lmp_share_19 + d2d$mkkp_share_19## opposition parties minus momentum
d2d$left_share_19 <- d2d$dk_share_19+d2d$mszp_share_19+d2d$lmp_share_19 +
  d2d$mkkp_share_19 ## opposition parties minus momentum minus jobbik

## Precinct vote share in 2019: based on number of eligible voters in precinct
d2d$momentum_share2_19 <- (d2d$momentum_19/d2d$eligible_v_19)*100
d2d$fidesz_share2_19 <- (d2d$fidesz_19/d2d$eligible_v_19)*100
d2d$jobbik_share2_19 <- (d2d$jobbik_19/d2d$eligible_v_19)*100
d2d$dk_share2_19 <- (d2d$dk_19/d2d$eligible_v_19)*100
d2d$mszp_share2_19 <- (d2d$mszp_19/d2d$eligible_v_19)*100
d2d$lmp_share2_19 <- (d2d$lmp_19/d2d$eligible_v_19)*100
d2d$mkkp_share2_19 <- (d2d$mkkp_19/d2d$eligible_v_19)*100
d2d$other_share2_19 <- d2d$dk_share2_19+d2d$jobbik_share2_19+
  d2d$mszp_share2_19+d2d$lmp_share2_19 + d2d$mkkp_share2_19


## Analysis datasets:
## 1. Full sample (with or without outliers)
d2df <- d2d %>% dplyr::select(precinct_id, 
                             city, city_tevk, ## fixed-effects
                             visited, log_vpb, momentum_share_18, ## treatment and observables
                             fidesz_share_18, other_share_18, left_share_18, jobbik_share_18, turnout_18, ## covariates
                             momentum_share_19, momentum_share2_19, ## outcome variables
                             fidesz_share_19, fidesz_share2_19, other_share_19, other_share2_19, 
                             left_share_19, jobbik_share_19,
                             city_canv) ## for restriction to observations from cities with at least one treatment precinct

## Removing outliers (turnout higher than 100% (voters who voted elsewhere than they live))
d2df_to_max <- d2df %>% filter(turnout_18 <= 100) 

## 2. Restricted sample: observations from localities (city) in which at least one precinct was visited
d2df_r <- d2df %>% filter(city_canv > 0) ## outliers
d2df_to_max_r <- d2df_to_max %>% filter(city_canv > 0) ## no outliers

## 3. Matched sample: coarsened exact matching
match.l1 <- matchit(visited ~ log_vpb + turnout_18 + momentum_share_18 +
                      fidesz_share_18 + other_share_18,
                    data = d2df_to_max, method = "cem")
match_dat1 <- match.data(match.l1) ## without outliers

match.l2 <- matchit(visited ~ log_vpb + turnout_18 + momentum_share_18 +
                      fidesz_share_18 + other_share_18,
                    data = d2df, method = "cem")
match_dat2 <- match.data(match.l2) ## with outliers, with turnout in matching model

match.l3 <- matchit(visited ~ log_vpb + momentum_share_18 +
                      fidesz_share_18 + other_share_18,
                    data = d2df, method = "cem")
match_dat3 <- match.data(match.l3) ## with outliers, without turnout in matching model

## Final files for alternative models:
##write.csv(d2df, "d2d_full.csv")
##write.csv(d2df_r, "d2d_restricted.csv")
##write.csv(d2df_to_max, "d2d_full_nooutlier.csv")
##write.csv(d2df_to_max_r, "d2d_restricted_nooutlier.csv")
##write.csv(match_dat1, "d2d_match_nooutlier.csv")
##write.csv(match_dat2, "d2d_match.csv")
##write.csv(match_dat3, "d2d_match_noturnout.csv")

## END OF SCRIPT